Task: Your task is to create a presentation based on the data in the dataset tab. The presentation you create will explore some observations which aren't obvious at first glance and derive insights which aren't expected. Preqin's goal is to understand how Real Estate data can tell an interesting story about where Investors should invest their money.
import pandas as pd
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pprint
# loading the file
df = pd.read_excel('/Users/shaunlim/Documents/JupyterNotebooks/Preqin Case Study Interview/Property Data Test - (SL).xlsx', sheet_name="dataset")
df.head()
| UK region | Postcode | Avg asking price | Avg asking rent (pm) | Sales per month | Avg. bedrooms | Avg. Population | |
|---|---|---|---|---|---|---|---|
| 0 | East Midlands | DE1 | 142346.0 | 628.0 | 12.0 | 2.0 | 14438 |
| 1 | East Midlands | DE3 | 267574.0 | 775.0 | 21.0 | 3.0 | 17094 |
| 2 | East Midlands | DE4 | 310718.0 | 757.0 | 50.0 | 3.0 | 33461 |
| 3 | East Midlands | DE5 | 211534.0 | 624.0 | 20.0 | 3.0 | 22075 |
| 4 | East Midlands | DE6 | 388940.0 | NaN | 30.0 | 3.0 | 25599 |
print('SHAPE:')
print(df.shape)
print('')
print('DATATYPES:')
print(df.info())
# Data types are approriate for this analysis
SHAPE: (2185, 7) DATATYPES: <class 'pandas.core.frame.DataFrame'> RangeIndex: 2185 entries, 0 to 2184 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 UK region 2185 non-null object 1 Postcode 2185 non-null object 2 Avg asking price 2164 non-null float64 3 Avg asking rent (pm) 1240 non-null float64 4 Sales per month 2173 non-null float64 5 Avg. bedrooms 2183 non-null float64 6 Avg. Population 2185 non-null int64 dtypes: float64(4), int64(1), object(2) memory usage: 119.6+ KB None
df.describe()
| Avg asking price | Avg asking rent (pm) | Sales per month | Avg. bedrooms | Avg. Population | |
|---|---|---|---|---|---|
| count | 2.164000e+03 | 1240.000000 | 2173.000000 | 2183.000000 | 2185.000000 |
| mean | 3.168718e+05 | 986.191935 | 26.643350 | 2.852955 | 25581.333638 |
| std | 1.657810e+05 | 411.884088 | 16.653253 | 0.378015 | 16062.670132 |
| min | 5.241200e+04 | 394.000000 | 2.000000 | 0.000000 | 17.000000 |
| 25% | 2.061005e+05 | 691.000000 | 14.000000 | 3.000000 | 13497.000000 |
| 50% | 2.854595e+05 | 910.000000 | 24.000000 | 3.000000 | 23521.000000 |
| 75% | 3.888515e+05 | 1188.000000 | 36.000000 | 3.000000 | 34510.000000 |
| max | 2.147277e+06 | 3765.000000 | 119.000000 | 4.000000 | 153811.000000 |
df['Postcode'].duplicated().sum()
0
missing_values_count = df.isnull().sum()
missing_values_count
UK region 0 Postcode 0 Avg asking price 21 Avg asking rent (pm) 945 Sales per month 12 Avg. bedrooms 2 Avg. Population 0 dtype: int64
df[df['Sales per month'].isnull()]
| UK region | Postcode | Avg asking price | Avg asking rent (pm) | Sales per month | Avg. bedrooms | Avg. Population | |
|---|---|---|---|---|---|---|---|
| 724 | North East | LS3 | 159082.0 | 709.0 | NaN | 2.0 | 4941 |
| 767 | North East | NE19 | 271150.0 | NaN | NaN | 3.0 | 2144 |
| 792 | North East | NE44 | NaN | NaN | NaN | 4.0 | 1073 |
| 1023 | North West | L2 | 133574.0 | 752.0 | NaN | 1.0 | 935 |
| 1048 | North West | L27 | 121139.0 | NaN | NaN | 3.0 | 6415 |
| 1081 | North West | M2 | 259444.0 | NaN | NaN | NaN | 87 |
| 1095 | North West | M17 | NaN | NaN | NaN | NaN | 17 |
| 1642 | South West | GL9 | NaN | NaN | NaN | 3.0 | 2587 |
| 1693 | South West | PL33 | NaN | NaN | NaN | 3.0 | 1911 |
| 1925 | Wales | SA47 | NaN | NaN | NaN | 3.0 | 1136 |
| 1964 | West Midlands | B2 | 169170.0 | NaN | NaN | 0.0 | 653 |
| 1966 | West Midlands | B4 | 262284.0 | 873.0 | NaN | 2.0 | 4332 |
df[df['Avg. bedrooms'].isnull()]
| UK region | Postcode | Avg asking price | Avg asking rent (pm) | Sales per month | Avg. bedrooms | Avg. Population | |
|---|---|---|---|---|---|---|---|
| 1081 | North West | M2 | 259444.0 | NaN | NaN | NaN | 87 |
| 1095 | North West | M17 | NaN | NaN | NaN | NaN | 17 |
df[df['Avg asking price'].isnull() & df['Avg asking rent (pm)'].isnull()].shape
(21, 7)
df_1 = df
df_1 = df_1.dropna(subset=['Sales per month'])
df_1 = df_1.dropna(subset=['Avg. bedrooms'])
df_1 = df_1.dropna(subset=['Avg asking price', 'Avg asking rent (pm)'], how='all')
df_1.shape
# dropped 28 rows
(2157, 7)
plt.figure(figsize=(12, 8))
df_1.hist(bins=30, figsize=(15, 10))
plt.show()
<Figure size 1200x800 with 0 Axes>
missing_values_count = df_1.isnull().sum()
missing_values_count
UK region 0 Postcode 0 Avg asking price 0 Avg asking rent (pm) 920 Sales per month 0 Avg. bedrooms 0 Avg. Population 0 dtype: int64
def weighted_average_salesPM(x):
return np.average(x, weights=df_1.loc[x.index, 'Sales per month'])
def weighted_average_population(x):
x = x.dropna()
if len(x) == 0:
return np.nan
return np.average(x, weights=df_1.loc[x.index, 'Avg. Population'])
df_1_pivot = df_1.pivot_table(index='UK region',
values=['Postcode', 'Avg asking price', 'Avg asking rent (pm)', 'Avg. Population', 'Sales per month', 'Avg. bedrooms'],
aggfunc={
'Postcode': 'count',
'Avg asking price':weighted_average_salesPM,
'Avg asking rent (pm)': weighted_average_population,
'Avg. Population':'sum',
'Sales per month':'sum',
'Avg. bedrooms':'mean',
}
, dropna=False
, margins=True
)
df_1_pivot.rename(columns={'Avg asking rent (pm)': 'Weighted Avg asking rent (pm) (based on population in district)', 'Avg asking price': 'Weighted avg asking price (based on sales in district)',
'Avg. Population': 'Population', 'Postcode': 'Num of Postcodes'}, inplace=True)
df_1_pivot
# Data doens't contain Scotland, Northern Ireland and doesn't segment for Yorkshire and the Humber.
| Weighted avg asking price (based on sales in district) | Weighted Avg asking rent (pm) (based on population in district) | Population | Avg. bedrooms | Num of Postcodes | Sales per month | |
|---|---|---|---|---|---|---|
| UK region | ||||||
| East Midlands | 221605.440438 | 683.567820 | 5277126 | 2.907407 | 162 | 4936.0 |
| East of England | 349530.465820 | 976.900844 | 5678781 | 2.901709 | 234 | 7007.0 |
| Greater London | 536042.332972 | 1528.803782 | 9484929 | 2.547445 | 274 | 8298.0 |
| North East | 202115.093784 | 634.181806 | 5090957 | 2.873832 | 214 | 4585.0 |
| North West | 213862.110842 | 686.568412 | 8261286 | 2.877966 | 295 | 7840.0 |
| South East | 377758.398592 | 1076.648193 | 7577168 | 2.876190 | 315 | 9802.0 |
| South West | 319675.908471 | 951.452885 | 5408633 | 2.911439 | 271 | 7189.0 |
| Wales | 224486.460421 | 773.361507 | 3123807 | 2.976879 | 173 | 2754.0 |
| West Midlands | 255166.589598 | 750.979474 | 5934461 | 2.899543 | 219 | 5441.0 |
| All | 321544.551891 | 982.707732 | 55837148 | 2.854427 | 2157 | 57852.0 |
# importing additional data
districtname = pd.read_csv('/Users/shaunlim/Documents/JupyterNotebooks/Preqin Case Study Interview/Postcode districts.csv')
# https://www.doogal.co.uk/PostcodeDownloads
extra_propertydata = pd.read_csv('/Users/shaunlim/Documents/JupyterNotebooks/Preqin Case Study Interview/Propertydata_new_data.csv')
# https://propertydata.co.uk/postcode-data
extra_propertydata.head()
| Postcode | Avg asking price | Avg blended £/sqft | Sales per month | Sales Turnover (sale) | Avg asking rent (pm) | Avg yield | 3yr house price growth | Crime rate | Avg. household income | Social rent | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | AL1 | 526969.0 | 620.0 | 25.0 | 0.04 | 1619.0 | 0.037 | 0.07 | 104.0 | 61500 | 0.13 |
| 1 | AL2 | 588134.0 | 516.0 | 13.0 | 0.06 | 1634.0 | 0.033 | 0.08 | 81.0 | 51800 | 0.10 |
| 2 | AL3 | 658496.0 | 581.0 | 18.0 | 0.10 | 1937.0 | 0.035 | 0.03 | 83.0 | 59000 | 0.06 |
| 3 | AL4 | 682134.0 | 579.0 | 14.0 | 0.09 | 1601.0 | 0.028 | 0.20 | 55.0 | 56200 | 0.06 |
| 4 | AL5 | 760869.0 | 635.0 | 22.0 | 0.12 | 1900.0 | 0.030 | 0.06 | 49.0 | 63200 | 0.04 |
districtname[['Postcode', 'UK region']].head()
| Postcode | UK region | |
|---|---|---|
| 0 | AB1 | Scotland |
| 1 | AB2 | Scotland |
| 2 | AB3 | Scotland |
| 3 | AB4 | Scotland |
| 4 | AB5 | Scotland |
# joining new data on Postcode
districtname.rename(columns={'UK region': 'UK Region New'}, inplace=True)
districtnameregion = districtname[['Postcode', 'Region', 'Town/Area', "UK Region New"]]
df_2 = pd.merge(df_1, districtnameregion, on='Postcode', how='left')
extra_propertydata_1 = extra_propertydata[['Postcode', '3yr house price growth', 'Avg. household income', 'Crime rate', 'Social rent']]
df_3 = pd.merge(df_2, extra_propertydata_1, on='Postcode', how='left')
df_3.head()
| UK region | Postcode | Avg asking price | Avg asking rent (pm) | Sales per month | Avg. bedrooms | Avg. Population | Region | Town/Area | UK Region New | 3yr house price growth | Avg. household income | Crime rate | Social rent | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | East Midlands | DE1 | 142346.0 | 628.0 | 12.0 | 2.0 | 14438 | Derby | Derby city centre | East Midlands | NaN | 33300 | 662.0 | 0.19 |
| 1 | East Midlands | DE3 | 267574.0 | 775.0 | 21.0 | 3.0 | 17094 | Derby | Mickleover | East Midlands | 0.03 | 51200 | 61.0 | 0.06 |
| 2 | East Midlands | DE4 | 310718.0 | 757.0 | 50.0 | 3.0 | 33461 | Derbyshire Dales | Darley Dale, Beeley, Rowsley, Winster, Darley ... | East Midlands | 0.06 | 45100 | 71.0 | 0.06 |
| 3 | East Midlands | DE5 | 211534.0 | 624.0 | 20.0 | 3.0 | 22075 | Amber Valley | Codnor, Denby, Ripley | East Midlands | 0.05 | 37100 | 116.0 | 0.07 |
| 4 | East Midlands | DE6 | 388940.0 | NaN | 30.0 | 3.0 | 25599 | Derbyshire Dales | Ashbourne, Hulland Ward, Weston Underwood | East Midlands | 0.09 | 46300 | 47.0 | 0.03 |
df_3.isnull().sum()
UK region 0 Postcode 0 Avg asking price 0 Avg asking rent (pm) 920 Sales per month 0 Avg. bedrooms 0 Avg. Population 0 Region 7 Town/Area 7 UK Region New 7 3yr house price growth 81 Avg. household income 0 Crime rate 57 Social rent 0 dtype: int64
df_2.head()
| UK region | Postcode | Avg asking price | Avg asking rent (pm) | Sales per month | Avg. bedrooms | Avg. Population | Region | Town/Area | UK Region New | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | East Midlands | DE1 | 142346.0 | 628.0 | 12.0 | 2.0 | 14438 | Derby | Derby city centre | East Midlands |
| 1 | East Midlands | DE3 | 267574.0 | 775.0 | 21.0 | 3.0 | 17094 | Derby | Mickleover | East Midlands |
| 2 | East Midlands | DE4 | 310718.0 | 757.0 | 50.0 | 3.0 | 33461 | Derbyshire Dales | Darley Dale, Beeley, Rowsley, Winster, Darley ... | East Midlands |
| 3 | East Midlands | DE5 | 211534.0 | 624.0 | 20.0 | 3.0 | 22075 | Amber Valley | Codnor, Denby, Ripley | East Midlands |
| 4 | East Midlands | DE6 | 388940.0 | NaN | 30.0 | 3.0 | 25599 | Derbyshire Dales | Ashbourne, Hulland Ward, Weston Underwood | East Midlands |
df_3.head()
| UK region | Postcode | Avg asking price | Avg asking rent (pm) | Sales per month | Avg. bedrooms | Avg. Population | Region | Town/Area | UK Region New | 3yr house price growth | Avg. household income | Crime rate | Social rent | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | East Midlands | DE1 | 142346.0 | 628.0 | 12.0 | 2.0 | 14438 | Derby | Derby city centre | East Midlands | NaN | 33300 | 662.0 | 0.19 |
| 1 | East Midlands | DE3 | 267574.0 | 775.0 | 21.0 | 3.0 | 17094 | Derby | Mickleover | East Midlands | 0.03 | 51200 | 61.0 | 0.06 |
| 2 | East Midlands | DE4 | 310718.0 | 757.0 | 50.0 | 3.0 | 33461 | Derbyshire Dales | Darley Dale, Beeley, Rowsley, Winster, Darley ... | East Midlands | 0.06 | 45100 | 71.0 | 0.06 |
| 3 | East Midlands | DE5 | 211534.0 | 624.0 | 20.0 | 3.0 | 22075 | Amber Valley | Codnor, Denby, Ripley | East Midlands | 0.05 | 37100 | 116.0 | 0.07 |
| 4 | East Midlands | DE6 | 388940.0 | NaN | 30.0 | 3.0 | 25599 | Derbyshire Dales | Ashbourne, Hulland Ward, Weston Underwood | East Midlands | 0.09 | 46300 | 47.0 | 0.03 |
def weighted_average_salesPM(x):
x = x.dropna()
if len(x) == 0:
return np.nan
return np.average(x, weights=df_3.loc[x.index, 'Sales per month'])
def weighted_average_population(x):
x = x.dropna()
if len(x) == 0:
return np.nan
return np.average(x, weights=df_3.loc[x.index, 'Avg. Population'])
df_3_pivot = df_3.pivot_table(index='UK Region New',
values=['Postcode', 'Avg asking price', 'Avg asking rent (pm)', 'Avg. Population', 'Sales per month', 'Avg. bedrooms'
, '3yr house price growth', 'Avg. household income', 'Crime rate', 'Social rent'],
aggfunc={
'Postcode': 'count',
'Avg asking price':weighted_average_salesPM,
'Avg asking rent (pm)': weighted_average_population,
'Avg. Population':'sum',
'Sales per month':'sum',
'Avg. bedrooms':weighted_average_salesPM,
'3yr house price growth':weighted_average_salesPM,
'Avg. household income':weighted_average_population
, 'Crime rate':weighted_average_population
, 'Social rent':weighted_average_population
}
, dropna=False
, margins=True
)
df_3_pivot.rename(columns={'Avg asking rent (pm)': 'Weighted Avg asking rent (pm) (based on population in district)', 'Avg asking price': 'Weighted avg asking price (based on sales in district)',
'Avg. Population': 'Population', 'Postcode': 'Num of Postcodes'}, inplace=True)
df_3_pivot
| 3yr house price growth | Weighted avg asking price (based on sales in district) | Weighted Avg asking rent (pm) (based on population in district) | Population | Avg. bedrooms | Avg. household income | Crime rate | Num of Postcodes | Sales per month | Social rent | |
|---|---|---|---|---|---|---|---|---|---|---|
| UK Region New | ||||||||||
| East Midlands | 0.079545 | 248843.603284 | 724.639221 | 4558393 | 2.955488 | 40589.499194 | 108.583964 | 136 | 4628.0 | 0.108776 |
| East of England | 0.066802 | 356777.068049 | 1008.661684 | 5829038 | 2.895491 | 45536.616385 | 97.856910 | 236 | 7186.0 | 0.115757 |
| London | 0.055285 | 543158.198006 | 1548.286461 | 7991857 | 2.473313 | 53481.808558 | 124.345228 | 205 | 6520.0 | 0.201236 |
| North East | 0.023679 | 159408.251898 | 548.187278 | 2568180 | 2.916034 | 35134.300594 | 140.010295 | 109 | 2108.0 | 0.173887 |
| North West | 0.092766 | 217553.169433 | 697.162662 | 6999252 | 2.916264 | 38316.145840 | 99.373416 | 245 | 6628.0 | 0.135778 |
| South East | 0.077927 | 390787.650022 | 1102.925564 | 8588274 | 2.857373 | 50358.050966 | 97.663143 | 368 | 11155.0 | 0.099675 |
| South West | 0.100465 | 317928.599455 | 950.232540 | 5267971 | 2.884268 | 41621.448998 | 82.986665 | 265 | 6973.0 | 0.091093 |
| Wales | 0.134684 | 216411.239608 | 775.188626 | 3016277 | 2.975294 | 36730.496437 | 102.827479 | 163 | 2550.0 | 0.105858 |
| West Midlands | 0.098606 | 253941.655673 | 739.595272 | 5606150 | 2.937377 | 39434.861054 | 117.486571 | 216 | 5094.0 | 0.141912 |
| Yorkshire and The Humber | 0.092786 | 209919.475436 | 666.500810 | 5261967 | 2.926512 | 38601.352536 | 130.205630 | 207 | 4926.0 | 0.126801 |
| All | 0.083285 | 321544.551891 | 982.707732 | 55837148 | 2.855078 | 43542.236126 | 110.330095 | 2157 | 57852.0 | 0.131249 |
df_3_pivot.to_excel('/Users/shaunlim/Documents/JupyterNotebooks/Preqin Case Study Interview/df_3_pivot.xlsx', index=True)
Task: Your task is to create a presentation based on the data in the dataset tab. The presentation you create will explore some observations which aren't obvious at first glance and derive insights which aren't expected. Preqin's goal is to understand how Real Estate data can tell an interesting story about where Investors should invest their money.
# Dropping districts with sales less than 20 per month, population under 20k and with no rental yield information.
df_2a = df_2[df_2['Sales per month'] >= 20]
df_2b = df_2[df_2['Avg. Population'] >= 20000]
df_2b1 = df_2a[df_2a['Avg. Population'] >= 20000]
df_2c = df_2b1.dropna(subset=['Avg asking rent (pm)'])
print('Original shape', df_2.shape)
print('Removing districts with sales under 20 per month', df_2a.shape)
print('Removing districts with population under 20k', df_2b.shape)
print('Removing districts with sales under 20 per month and population under 20k', df_2b1.shape)
print('Removing districts with sales under 20 per month, population under 20k, no rental data', df_2c.shape)
Original shape (2157, 10) Removing districts with sales under 20 per month (1351, 10) Removing districts with population under 20k (1289, 10) Removing districts with sales under 20 per month and population under 20k (1138, 10) Removing districts with sales under 20 per month, population under 20k, no rental data (944, 10)
# calculating rental yield and marketsize
df_2d = df_2c.copy()
df_2d.loc[:, 'rental yield'] = df_2c['Avg asking rent (pm)'] * 12 / df_2c['Avg asking price']
df_2d.loc[:, 'annual marketsize (m)'] = df_2c['Sales per month'] * df_2c['Avg asking price'] / 1000000 * 12
df_2d.head()
| UK region | Postcode | Avg asking price | Avg asking rent (pm) | Sales per month | Avg. bedrooms | Avg. Population | Region | Town/Area | UK Region New | rental yield | annual marketsize (m) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | East Midlands | DE4 | 310718.0 | 757.0 | 50.0 | 3.0 | 33461 | Derbyshire Dales | Darley Dale, Beeley, Rowsley, Winster, Darley ... | East Midlands | 0.029236 | 186.430800 |
| 3 | East Midlands | DE5 | 211534.0 | 624.0 | 20.0 | 3.0 | 22075 | Amber Valley | Codnor, Denby, Ripley | East Midlands | 0.035399 | 50.768160 |
| 5 | East Midlands | DE7 | 193889.0 | 615.0 | 49.0 | 3.0 | 49998 | Erewash | Horsley Woodhouse, Ilkeston, Morley, West Hallam | East Midlands | 0.038063 | 114.006732 |
| 6 | East Midlands | DE11 | 200707.0 | 652.0 | 46.0 | 3.0 | 43734 | South Derbyshire | Church Gresley, Hartshorne, Newhall, Swadlincote | East Midlands | 0.038982 | 110.790264 |
| 8 | East Midlands | DE13 | 284325.0 | 711.0 | 50.0 | 3.0 | 40249 | East Staffordshire | Alrewas, Barton-under-Needwood, Rolleston on D... | West Midlands | 0.030008 | 170.595000 |
# top_10_postcodes = df_2d.nlargest(10, 'rental yield')['Postcode']
# top_10_rows = df_2d[df_2d['Postcode'].isin(top_10_postcodes)]
# top_10_rows = df_2d.head(10)
# df_sorted = df_2d.sort_values(by='rental yield', ascending=False)
# df_sorted.drop(['Region', 'Town/Area', 'Avg. bedrooms', 'UK region'], axis = 1)
# df_sorted.head(20)
Insight / Observation:
numeric_df = df_2d.select_dtypes(include=[float, int])
correlation_matrix = numeric_df.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Heatmap')
plt.show()
# plt.scatter(df_2d['Avg asking price'], df_2d['rental yield'])
sns.scatterplot(x='Avg asking price', y='rental yield', data=df_2d, s=40)
plt.title('Scatter Plot of Avg asking price vs. rental yield')
plt.xlabel('Avg asking price')
plt.ylabel('rental yield')
plt.show()
df_2e = df_2d.copy()
df_2e.loc[:, 'yield score_'] = pd.qcut(df_2d['rental yield'], q=5, labels=[5, 4, 3, 2, 1])
df_2e.loc[:, 'liquidity score_'] = pd.qcut(df_2d['Sales per month'], q=5, labels=[5, 4, 3, 2, 1])
# [1, 2, 3, 4]
df_2f = df_2e.copy()
df_2f['yield score'] = df_2f['yield score_'].astype(int)
df_2f['liquidity score'] = df_2f['liquidity score_'].astype(int)
df_2f.drop(columns=['yield score_', 'liquidity score_'], inplace=True)
df_2f.head()
| UK region | Postcode | Avg asking price | Avg asking rent (pm) | Sales per month | Avg. bedrooms | Avg. Population | Region | Town/Area | UK Region New | rental yield | annual marketsize (m) | yield score | liquidity score | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | East Midlands | DE4 | 310718.0 | 757.0 | 50.0 | 3.0 | 33461 | Derbyshire Dales | Darley Dale, Beeley, Rowsley, Winster, Darley ... | East Midlands | 0.029236 | 186.430800 | 5 | 2 |
| 3 | East Midlands | DE5 | 211534.0 | 624.0 | 20.0 | 3.0 | 22075 | Amber Valley | Codnor, Denby, Ripley | East Midlands | 0.035399 | 50.768160 | 4 | 5 |
| 5 | East Midlands | DE7 | 193889.0 | 615.0 | 49.0 | 3.0 | 49998 | Erewash | Horsley Woodhouse, Ilkeston, Morley, West Hallam | East Midlands | 0.038063 | 114.006732 | 3 | 2 |
| 6 | East Midlands | DE11 | 200707.0 | 652.0 | 46.0 | 3.0 | 43734 | South Derbyshire | Church Gresley, Hartshorne, Newhall, Swadlincote | East Midlands | 0.038982 | 110.790264 | 3 | 2 |
| 8 | East Midlands | DE13 | 284325.0 | 711.0 | 50.0 | 3.0 | 40249 | East Staffordshire | Alrewas, Barton-under-Needwood, Rolleston on D... | West Midlands | 0.030008 | 170.595000 | 5 | 2 |
# Sense check
df_2f.pivot_table(index='yield score',
values=['rental yield', 'Avg asking price', 'Avg asking rent (pm)', 'Sales per month'],
aggfunc='mean'
, dropna=False
# , margins=True
)
| Avg asking price | Avg asking rent (pm) | Sales per month | rental yield | |
|---|---|---|---|---|
| yield score | ||||
| 1 | 193993.619048 | 778.423280 | 33.873016 | 0.048621 |
| 2 | 257304.904762 | 883.550265 | 38.793651 | 0.041263 |
| 3 | 322533.377660 | 1000.553191 | 43.180851 | 0.037327 |
| 4 | 373470.746032 | 1053.846561 | 41.417989 | 0.033900 |
| 5 | 461596.619048 | 1115.767196 | 40.597884 | 0.029504 |
df_2f.pivot_table(index='liquidity score',
values=['rental yield', 'Avg asking price', 'Avg asking rent (pm)', 'Sales per month'],
aggfunc='mean'
, dropna=False
# , margins=True
)
| Avg asking price | Avg asking rent (pm) | Sales per month | rental yield | |
|---|---|---|---|---|
| liquidity score | ||||
| 1 | 325507.916201 | 977.955307 | 63.340782 | 0.036963 |
| 2 | 321045.179191 | 958.184971 | 45.283237 | 0.037115 |
| 3 | 332268.210526 | 966.851675 | 37.598086 | 0.036675 |
| 4 | 309531.592391 | 927.451087 | 30.701087 | 0.038062 |
| 5 | 319370.984925 | 998.648241 | 23.487437 | 0.041625 |
# creating a master score
df_2g = df_2f.copy()
df_2g.loc[:, 'yield and liquidity score'] = (df_2g['yield score'] + df_2g['liquidity score']) - 1
df_2g.head()
| UK region | Postcode | Avg asking price | Avg asking rent (pm) | Sales per month | Avg. bedrooms | Avg. Population | Region | Town/Area | UK Region New | rental yield | annual marketsize (m) | yield score | liquidity score | yield and liquidity score | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | East Midlands | DE4 | 310718.0 | 757.0 | 50.0 | 3.0 | 33461 | Derbyshire Dales | Darley Dale, Beeley, Rowsley, Winster, Darley ... | East Midlands | 0.029236 | 186.430800 | 5 | 2 | 6 |
| 3 | East Midlands | DE5 | 211534.0 | 624.0 | 20.0 | 3.0 | 22075 | Amber Valley | Codnor, Denby, Ripley | East Midlands | 0.035399 | 50.768160 | 4 | 5 | 8 |
| 5 | East Midlands | DE7 | 193889.0 | 615.0 | 49.0 | 3.0 | 49998 | Erewash | Horsley Woodhouse, Ilkeston, Morley, West Hallam | East Midlands | 0.038063 | 114.006732 | 3 | 2 | 4 |
| 6 | East Midlands | DE11 | 200707.0 | 652.0 | 46.0 | 3.0 | 43734 | South Derbyshire | Church Gresley, Hartshorne, Newhall, Swadlincote | East Midlands | 0.038982 | 110.790264 | 3 | 2 | 4 |
| 8 | East Midlands | DE13 | 284325.0 | 711.0 | 50.0 | 3.0 | 40249 | East Staffordshire | Alrewas, Barton-under-Needwood, Rolleston on D... | West Midlands | 0.030008 | 170.595000 | 5 | 2 | 6 |
df_2g.pivot_table(index='yield and liquidity score',
values=['rental yield', 'Avg asking price', 'Avg asking rent (pm)', 'Sales per month'],
aggfunc='mean'
, dropna=False
# , margins=True
)
| Avg asking price | Avg asking rent (pm) | Sales per month | rental yield | |
|---|---|---|---|---|
| yield and liquidity score | ||||
| 1 | 217170.666667 | 853.285714 | 60.333333 | 0.046619 |
| 2 | 241661.508772 | 872.684211 | 57.122807 | 0.043958 |
| 3 | 267403.407407 | 896.092593 | 50.805556 | 0.041218 |
| 4 | 284996.629630 | 914.728395 | 44.839506 | 0.039825 |
| 5 | 283957.232068 | 905.510549 | 36.759494 | 0.040551 |
| 6 | 333211.881579 | 972.368421 | 34.125000 | 0.035810 |
| 7 | 401731.284483 | 1073.637931 | 32.474138 | 0.032580 |
| 8 | 446368.671875 | 1142.125000 | 27.718750 | 0.031085 |
| 9 | 639282.296296 | 1466.814815 | 23.666667 | 0.028848 |
# Creating a absolute ranking based on district liquidity, yield and a combination of both
df_2h = df_2g.copy()
df_2h['rental yield rank'] = df_2h['rental yield'].rank(method='dense', ascending=False).astype(int)
df_2h['liquidity rank'] = df_2h['Sales per month'].rank(method='dense', ascending=False).astype(int)
df_2h['rank_'] = df_2h['rental yield rank'] + df_2h['liquidity rank']
df_2h['rank'] = df_2h['rank_'].rank(method='dense', ascending=True).astype(int)
df_2h
| UK region | Postcode | Avg asking price | Avg asking rent (pm) | Sales per month | Avg. bedrooms | Avg. Population | Region | Town/Area | UK Region New | rental yield | annual marketsize (m) | yield score | liquidity score | yield and liquidity score | rental yield rank | liquidity rank | rank_ | rank | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | East Midlands | DE4 | 310718.0 | 757.0 | 50.0 | 3.0 | 33461 | Derbyshire Dales | Darley Dale, Beeley, Rowsley, Winster, Darley ... | East Midlands | 0.029236 | 186.430800 | 5 | 2 | 6 | 873 | 44 | 917 | 548 |
| 3 | East Midlands | DE5 | 211534.0 | 624.0 | 20.0 | 3.0 | 22075 | Amber Valley | Codnor, Denby, Ripley | East Midlands | 0.035399 | 50.768160 | 4 | 5 | 8 | 576 | 74 | 650 | 378 |
| 5 | East Midlands | DE7 | 193889.0 | 615.0 | 49.0 | 3.0 | 49998 | Erewash | Horsley Woodhouse, Ilkeston, Morley, West Hallam | East Midlands | 0.038063 | 114.006732 | 3 | 2 | 4 | 434 | 45 | 479 | 276 |
| 6 | East Midlands | DE11 | 200707.0 | 652.0 | 46.0 | 3.0 | 43734 | South Derbyshire | Church Gresley, Hartshorne, Newhall, Swadlincote | East Midlands | 0.038982 | 110.790264 | 3 | 2 | 4 | 388 | 48 | 436 | 245 |
| 8 | East Midlands | DE13 | 284325.0 | 711.0 | 50.0 | 3.0 | 40249 | East Staffordshire | Alrewas, Barton-under-Needwood, Rolleston on D... | West Midlands | 0.030008 | 170.595000 | 5 | 2 | 6 | 854 | 44 | 898 | 533 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2143 | West Midlands | WV3 | 219787.0 | 616.0 | 21.0 | 3.0 | 29663 | Wolverhampton | Finchfield, Compton, Castlecroft | West Midlands | 0.033633 | 55.386324 | 4 | 5 | 8 | 682 | 73 | 755 | 447 |
| 2146 | West Midlands | WV6 | 242356.0 | 712.0 | 38.0 | 3.0 | 44689 | Wolverhampton | Whitmore Reans, Perton, Pattingham, Tettenhall | West Midlands | 0.035254 | 110.514336 | 4 | 3 | 6 | 588 | 56 | 644 | 374 |
| 2150 | West Midlands | WV10 | 182142.0 | 781.0 | 27.0 | 3.0 | 56258 | Wolverhampton | Low Hill, Bushbury, Heath Town, Fordhouses, Fa... | West Midlands | 0.051454 | 59.014008 | 1 | 5 | 5 | 33 | 67 | 100 | 27 |
| 2151 | West Midlands | WV11 | 199076.0 | 663.0 | 24.0 | 3.0 | 34832 | Wolverhampton | Wednesfield | West Midlands | 0.039965 | 57.333888 | 2 | 5 | 6 | 334 | 70 | 404 | 221 |
| 2154 | West Midlands | WV14 | 176453.0 | 670.0 | 27.0 | 3.0 | 46931 | Wolverhampton | Bradley, Bilston Town | West Midlands | 0.045565 | 57.170772 | 1 | 5 | 5 | 129 | 67 | 196 | 93 |
944 rows × 19 columns
df_2i = df_2h.sort_values(by='rank').head(20).copy()
import plotly.express as px
# Creating the interactive bubble chart with tooltips
fig = px.scatter(df_2i, x='rental yield', y='Sales per month', size='annual marketsize (m)', color='rank', hover_name='Region',
hover_data={'rank': True, 'rental yield': True, 'Sales per month': True, 'annual marketsize (m)': True,
'UK Region New': True,
'Postcode': True,
'Avg. Population': True,
'Avg asking price': True
},
size_max=50
)
# Updating layout
fig.update_layout(title='Top 20 Districts - Bubble Chart',
xaxis_title='Rental Yield',
yaxis_title='Sales per month',
plot_bgcolor='white', # Set background color to white
xaxis=dict(showgrid=True, gridcolor='grey'), # Show x-axis gridlines and set color to grey
yaxis=dict(showgrid=True, gridcolor='grey') # Show y-axis gridlines and set color to grey
)
# Showing the plot
fig.show()
# Source / Notes
# Medium article for heatmaps -- https://focaalvarez.medium.com/mapping-the-uk-and-navigating-the-post-code-maze-4898e758b82f
# Enriching postcode data -- https://www.doogal.co.uk/PostcodeDownloads
# geodata converter -- https://mygeodata.cloud
# Geojson Data
# District / Postcode Geojson source -- topojson source -- https://www.opendoorlogistics.com/wp-content/uploads/Data/UK-postcode-boundaries-Jan-2015-topojson/Districts.json
# Region Geojson -- https://www.kaggle.com/datasets/dorianlazar/uk-regions-geojson?resource=download
import geopandas as gpd
import folium
# importing geodata
gdf = gpd.read_file('/Users/shaunlim/Downloads/mygeodata/newdata.geojson')
gdf_1 = gdf[['Postcode', 'geometry']].copy()
gdf_2 = pd.merge(df_2h, gdf_1, on='Postcode', how='left')
gdf_3 = gdf_2.copy()
gdf_3.dropna(subset=['geometry'], inplace=True)
print('gdf_2', gdf_2.shape)
print('df_3a', gdf_3.shape)
gdf_2 (944, 20) df_3a (942, 20)
gdf_4 = gpd.GeoDataFrame(gdf_3, geometry='geometry')
gdf_4.head()
| UK region | Postcode | Avg asking price | Avg asking rent (pm) | Sales per month | Avg. bedrooms | Avg. Population | Region | Town/Area | UK Region New | rental yield | annual marketsize (m) | yield score | liquidity score | yield and liquidity score | rental yield rank | liquidity rank | rank_ | rank | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | East Midlands | DE4 | 310718.0 | 757.0 | 50.0 | 3.0 | 33461 | Derbyshire Dales | Darley Dale, Beeley, Rowsley, Winster, Darley ... | East Midlands | 0.029236 | 186.430800 | 5 | 2 | 6 | 873 | 44 | 917 | 548 | POLYGON ((-1.48202 53.06879, -1.49092 53.07340... |
| 1 | East Midlands | DE5 | 211534.0 | 624.0 | 20.0 | 3.0 | 22075 | Amber Valley | Codnor, Denby, Ripley | East Midlands | 0.035399 | 50.768160 | 4 | 5 | 8 | 576 | 74 | 650 | 378 | POLYGON ((-1.38623 53.01340, -1.39228 53.00587... |
| 2 | East Midlands | DE7 | 193889.0 | 615.0 | 49.0 | 3.0 | 49998 | Erewash | Horsley Woodhouse, Ilkeston, Morley, West Hallam | East Midlands | 0.038063 | 114.006732 | 3 | 2 | 4 | 434 | 45 | 479 | 276 | POLYGON ((-1.28005 52.93673, -1.28438 52.93458... |
| 3 | East Midlands | DE11 | 200707.0 | 652.0 | 46.0 | 3.0 | 43734 | South Derbyshire | Church Gresley, Hartshorne, Newhall, Swadlincote | East Midlands | 0.038982 | 110.790264 | 3 | 2 | 4 | 388 | 48 | 436 | 245 | POLYGON ((-1.56805 52.79016, -1.56619 52.79055... |
| 4 | East Midlands | DE13 | 284325.0 | 711.0 | 50.0 | 3.0 | 40249 | East Staffordshire | Alrewas, Barton-under-Needwood, Rolleston on D... | West Midlands | 0.030008 | 170.595000 | 5 | 2 | 6 | 854 | 44 | 898 | 533 | POLYGON ((-1.62899 52.82719, -1.63047 52.82704... |
from folium.plugins import HeatMap
m = folium.Map(location=[52.38, -1.6], zoom_start = 7)
# # Create a HeatMap layer with rainbow color gradient
# heat_map = HeatMap(gdf_3[["Avg. sold price"]], gradient={0.0: 'blue', 1.0: 'red'}, radius=15, blur=10)
# # Add the HeatMap layer to the map
# m.add_child(heat_map)
cp = folium.Choropleth(
geo_data=gdf_4,
data=gdf_4,
columns=["Postcode", 'yield and liquidity score'],
key_on="feature.properties.Postcode",
fill_color='RdGy', fill_opacity=0.85, line_opacity=0.2,
legend_name='LEGEND',highlight=True
).add_to(m)
folium.GeoJsonTooltip(['yield and liquidity score', 'UK Region New', 'Region', "Postcode", "Avg. Population", "rental yield", "annual marketsize (m)",
"Sales per month"]).add_to(cp.geojson)
display(m)
from folium.plugins import HeatMap
m = folium.Map(location=[52.38, -1.6], zoom_start = 7)
# # Create a HeatMap layer with rainbow color gradient
# heat_map = HeatMap(gdf_3[["Avg. sold price"]], gradient={0.0: 'blue', 1.0: 'red'}, radius=15, blur=10)
# # Add the HeatMap layer to the map
# m.add_child(heat_map)
cp = folium.Choropleth(
geo_data=gdf_4,
data=gdf_4,
columns=["Postcode", 'rental yield rank'],
key_on="feature.properties.Postcode",
fill_color='RdGy', fill_opacity=0.85, line_opacity=0.2,
legend_name='LEGEND',highlight=True
).add_to(m)
folium.GeoJsonTooltip(['yield and liquidity score', 'yield score', 'rental yield rank', 'UK Region New', 'Region', "Postcode", "Avg. Population", "rental yield", "annual marketsize (m)",
"Sales per month"]).add_to(cp.geojson)
display(m)
df_presentation = df_1.drop(columns=['Avg. bedrooms'], inplace=False)
df_presentation.loc[:, 'rental yield'] = df_presentation['Avg asking rent (pm)'] * 12 / df_presentation['Avg asking price']
df_presentation.loc[:, 'annual marketsize (m)'] = df_presentation['Sales per month'] * df_presentation['Avg asking price'] / 1000000 * 12
# df_2d.head()
plt.figure(figsize=(12, 8))
df_presentation.hist(bins=30, figsize=(15, 10))
plt.show()
<Figure size 1200x800 with 0 Axes>
df_presentation.head()
| UK region | Postcode | Avg asking price | Avg asking rent (pm) | Sales per month | Avg. Population | rental yield | annual marketsize (m) | |
|---|---|---|---|---|---|---|---|---|
| 0 | East Midlands | DE1 | 142346.0 | 628.0 | 12.0 | 14438 | 0.052941 | 20.497824 |
| 1 | East Midlands | DE3 | 267574.0 | 775.0 | 21.0 | 17094 | 0.034757 | 67.428648 |
| 2 | East Midlands | DE4 | 310718.0 | 757.0 | 50.0 | 33461 | 0.029236 | 186.430800 |
| 3 | East Midlands | DE5 | 211534.0 | 624.0 | 20.0 | 22075 | 0.035399 | 50.768160 |
| 4 | East Midlands | DE6 | 388940.0 | NaN | 30.0 | 25599 | NaN | 140.018400 |
df_presentation.describe()
| Avg asking price | Avg asking rent (pm) | Sales per month | Avg. Population | rental yield | annual marketsize (m) | |
|---|---|---|---|---|---|---|
| count | 2.157000e+03 | 1237.000000 | 2157.000000 | 2157.000000 | 1237.000000 | 2157.000000 |
| mean | 3.172622e+05 | 986.696847 | 26.820584 | 25886.484933 | 0.039064 | 103.488153 |
| std | 1.658720e+05 | 412.241569 | 16.586642 | 15938.795728 | 0.008987 | 86.590487 |
| min | 5.241200e+04 | 394.000000 | 2.000000 | 865.000000 | 0.019681 | 2.192592 |
| 25% | 2.063750e+05 | 691.000000 | 14.000000 | 14025.000000 | 0.032826 | 43.680120 |
| 50% | 2.860650e+05 | 913.000000 | 24.000000 | 23744.000000 | 0.037743 | 80.311680 |
| 75% | 3.889400e+05 | 1188.000000 | 36.000000 | 34620.000000 | 0.043740 | 140.018400 |
| max | 2.147277e+06 | 3765.000000 | 119.000000 | 153811.000000 | 0.117225 | 871.542060 |
# Creating a figure with two subplots
fig, axes = plt.subplots(1, 2, figsize=(8, 6))
# Boxplot for Variable 1
sns.boxplot(y='rental yield', data=df_presentation, ax=axes[0])
axes[0].set_title('rental yield')
# Boxplot for Variable 2
sns.boxplot(y='annual marketsize (m)', data=df_presentation, ax=axes[1])
axes[1].set_title('annual marketsize (m)')
# Show plot
plt.tight_layout()
plt.show()
df_presentation_1 = df_2h.sort_values(by='rank').head(20).copy()
df_presentation_1.set_index('rank', inplace=True)
df_presentation_1.reset_index(drop=True, inplace=True)
df_presentation_1.index = df_presentation_1.index + 1
df_presentation_1[['UK Region New', 'Town/Area', 'Postcode', 'rental yield', 'annual marketsize (m)', 'Sales per month']]
| UK Region New | Town/Area | Postcode | rental yield | annual marketsize (m) | Sales per month | |
|---|---|---|---|---|---|---|
| 1 | South East | Brighton, Bevendean, Brighton Marina, Kemptown... | BN2 | 0.055823 | 486.188640 | 105.0 |
| 2 | North West | City Centre, Avenham, Broadgate, Deepdale, Fis... | PR1 | 0.060462 | 74.058300 | 45.0 |
| 3 | South East | Southsea | PO4 | 0.055816 | 153.023256 | 51.0 |
| 4 | North West | Barrow-in-Furness, Walney Island | LA14 | 0.057356 | 61.103688 | 43.0 |
| 5 | Wales | Bonymaen, Copper Quarter, Crymlyn Burrows, Dan... | SA1 | 0.072094 | 50.034960 | 30.0 |
| 6 | Wales | Pontypridd, Cilfynydd, Glyncoch, Graig, Trefor... | CF37 | 0.080981 | 42.210792 | 26.0 |
| 7 | North East | Dipton, Stanley | DH9 | 0.060805 | 34.285008 | 31.0 |
| 8 | West Midlands | Walsgrave, Wyken, Stoke, Bell Green, Wood End,... | CV2 | 0.054854 | 102.270168 | 43.0 |
| 9 | East of England | Colchester | CO2 | 0.053216 | 125.594352 | 46.0 |
| 10 | South East | Southsea | PO5 | 0.061391 | 67.206360 | 26.0 |
| 11 | North East | Ayres Quay, Barnes, Chester Road, Deptford, Fo... | SR4 | 0.061556 | 28.050876 | 21.0 |
| 12 | Yorkshire and The Humber | Belle Isle, Hunslet, Middleton, Stourton | LS10 | 0.061503 | 43.113576 | 22.0 |
| 13 | West Midlands | Tile Hill, Canley, Cannon Park, Lime Tree Park... | CV4 | 0.057836 | 88.960680 | 30.0 |
| 14 | North East | Easington, Easington Colliery, Horden, Little ... | SR8 | 0.059249 | 28.610820 | 27.0 |
| 15 | Yorkshire and The Humber | Hull, Marfleet | HU9 | 0.061479 | 24.781200 | 20.0 |
| 16 | North West | Nelson, Barrowford, Blacko, Brierfield, Higher... | BB9 | 0.052760 | 57.659616 | 42.0 |
| 17 | North East | South Shields Town Centre, Deans, High Shields | NE33 | 0.060390 | 28.354308 | 23.0 |
| 18 | North East | Walker, Byker, Heaton | NE6 | 0.060303 | 38.168640 | 24.0 |
| 19 | North East | Gateshead, Bensham | NE8 | 0.058352 | 32.920452 | 23.0 |
| 20 | North West | Middleton, Alkrington | M24 | 0.051100 | 99.424032 | 46.0 |
df_2h.head()
| UK region | Postcode | Avg asking price | Avg asking rent (pm) | Sales per month | Avg. bedrooms | Avg. Population | Region | Town/Area | UK Region New | rental yield | annual marketsize (m) | yield score | liquidity score | yield and liquidity score | rental yield rank | liquidity rank | rank_ | rank | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | East Midlands | DE4 | 310718.0 | 757.0 | 50.0 | 3.0 | 33461 | Derbyshire Dales | Darley Dale, Beeley, Rowsley, Winster, Darley ... | East Midlands | 0.029236 | 186.430800 | 5 | 2 | 6 | 873 | 44 | 917 | 548 |
| 3 | East Midlands | DE5 | 211534.0 | 624.0 | 20.0 | 3.0 | 22075 | Amber Valley | Codnor, Denby, Ripley | East Midlands | 0.035399 | 50.768160 | 4 | 5 | 8 | 576 | 74 | 650 | 378 |
| 5 | East Midlands | DE7 | 193889.0 | 615.0 | 49.0 | 3.0 | 49998 | Erewash | Horsley Woodhouse, Ilkeston, Morley, West Hallam | East Midlands | 0.038063 | 114.006732 | 3 | 2 | 4 | 434 | 45 | 479 | 276 |
| 6 | East Midlands | DE11 | 200707.0 | 652.0 | 46.0 | 3.0 | 43734 | South Derbyshire | Church Gresley, Hartshorne, Newhall, Swadlincote | East Midlands | 0.038982 | 110.790264 | 3 | 2 | 4 | 388 | 48 | 436 | 245 |
| 8 | East Midlands | DE13 | 284325.0 | 711.0 | 50.0 | 3.0 | 40249 | East Staffordshire | Alrewas, Barton-under-Needwood, Rolleston on D... | West Midlands | 0.030008 | 170.595000 | 5 | 2 | 6 | 854 | 44 | 898 | 533 |
df_presentation_2 = df_2h.sort_values(by='rental yield rank').head(10).copy()
df_presentation_2.set_index('rental yield rank', inplace=True)
df_presentation_2[['UK Region New', 'Town/Area', 'Postcode', 'rental yield', 'annual marketsize (m)', 'Sales per month']]
| UK Region New | Town/Area | Postcode | rental yield | annual marketsize (m) | Sales per month | |
|---|---|---|---|---|---|---|
| rental yield rank | ||||||
| 1 | Wales | Pontypridd, Cilfynydd, Glyncoch, Graig, Trefor... | CF37 | 0.080981 | 42.210792 | 26.0 |
| 2 | Wales | Bonymaen, Copper Quarter, Crymlyn Burrows, Dan... | SA1 | 0.072094 | 50.034960 | 30.0 |
| 3 | North East | Ayres Quay, Barnes, Chester Road, Deptford, Fo... | SR4 | 0.061556 | 28.050876 | 21.0 |
| 4 | Yorkshire and The Humber | Belle Isle, Hunslet, Middleton, Stourton | LS10 | 0.061503 | 43.113576 | 22.0 |
| 5 | Yorkshire and The Humber | Hull, Marfleet | HU9 | 0.061479 | 24.781200 | 20.0 |
| 6 | South East | Southsea | PO5 | 0.061391 | 67.206360 | 26.0 |
| 7 | North East | Dipton, Stanley | DH9 | 0.060805 | 34.285008 | 31.0 |
| 8 | North West | City Centre, Avenham, Broadgate, Deepdale, Fis... | PR1 | 0.060462 | 74.058300 | 45.0 |
| 9 | North East | South Shields Town Centre, Deans, High Shields | NE33 | 0.060390 | 28.354308 | 23.0 |
| 10 | North East | Walker, Byker, Heaton | NE6 | 0.060303 | 38.168640 | 24.0 |
df_presentation_3 = df_2h.sort_values(by='liquidity rank').head(10).copy()
df_presentation_3.set_index('liquidity rank', inplace=True)
df_presentation_3.reset_index(drop=True, inplace=True)
df_presentation_3.index = df_presentation_3.index + 1
df_presentation_3[['UK Region New', 'Town/Area', 'Postcode', 'rental yield', 'annual marketsize (m)', 'Sales per month']]
| UK Region New | Town/Area | Postcode | rental yield | annual marketsize (m) | Sales per month | |
|---|---|---|---|---|---|---|
| 1 | South East | Hove, Hangleton, West Blatchington | BN3 | 0.042149 | 546.415632 | 119.0 |
| 2 | London | Croydon, Addiscombe, Shirley, Addington, New A... | CR0 | 0.041558 | 487.381536 | 118.0 |
| 3 | South East | Brighton, Coldean, Falmer, Hollingbury, Patcha... | BN1 | 0.042618 | 563.736888 | 117.0 |
| 4 | London | Walthamstow, Upper Walthamstow | E17 | 0.035908 | 662.513904 | 117.0 |
| 5 | South East | Brighton, Bevendean, Brighton Marina, Kemptown... | BN2 | 0.055823 | 486.188640 | 105.0 |
| 6 | South West | Downend, Fishponds, Frenchay | BS16 | 0.043153 | 388.702008 | 102.0 |
| 7 | South West | Christchurch | BH23 | 0.036513 | 452.933352 | 97.0 |
| 8 | London | Wandsworth, Southfields, Earlsfield | SW18 | 0.034494 | 679.256760 | 91.0 |
| 9 | East of England | Leighton Buzzard, Bragenham, Briggington, Burc... | LU7 | 0.031615 | 384.108480 | 90.0 |
| 10 | East Midlands | East Leake, West Leake, Sutton Bonington, Moun... | LE12 | 0.031287 | 297.415800 | 90.0 |
df_presentation_4 = df.copy()
df_presentation_4.loc[:, 'annual marketsize (m)'] = df['Sales per month'] * df['Avg asking price'] / 1000000 * 12
df_presentation_4
| UK region | Postcode | Avg asking price | Avg asking rent (pm) | Sales per month | Avg. bedrooms | Avg. Population | annual marketsize (m) | |
|---|---|---|---|---|---|---|---|---|
| 0 | East Midlands | DE1 | 142346.0 | 628.0 | 12.0 | 2.0 | 14438 | 20.497824 |
| 1 | East Midlands | DE3 | 267574.0 | 775.0 | 21.0 | 3.0 | 17094 | 67.428648 |
| 2 | East Midlands | DE4 | 310718.0 | 757.0 | 50.0 | 3.0 | 33461 | 186.430800 |
| 3 | East Midlands | DE5 | 211534.0 | 624.0 | 20.0 | 3.0 | 22075 | 50.768160 |
| 4 | East Midlands | DE6 | 388940.0 | NaN | 30.0 | 3.0 | 25599 | 140.018400 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2180 | West Midlands | WV12 | 192855.0 | NaN | 19.0 | 3.0 | 25417 | 43.970940 |
| 2181 | West Midlands | WV13 | 161442.0 | NaN | 10.0 | 3.0 | 20899 | 19.373040 |
| 2182 | West Midlands | WV14 | 176453.0 | 670.0 | 27.0 | 3.0 | 46931 | 57.170772 |
| 2183 | West Midlands | WV15 | 301648.0 | NaN | 9.0 | 3.0 | 8412 | 32.577984 |
| 2184 | West Midlands | WV16 | 252390.0 | NaN | 25.0 | 3.0 | 17634 | 75.717000 |
2185 rows × 8 columns
df_presentation_4['annual marketsize (m)'].sum()
223223.944992